library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggthemes)
library(ggrepel)
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
font_add_google("Lato", "lato")
showtext_auto()
babynames <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-03-22/babynames.csv')
## Rows: 1924665 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): sex, name
## dbl (3): year, n, prop
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
babynames_letter <- babynames %>%
mutate(name = tolower(name)) %>%
group_by(year, sex, name, n) %>%
summarise(letter = unlist(str_split(name, '')))
## `summarise()` has grouped output by 'year', 'sex', 'name', 'n'. You can override using the `.groups` argument.
babynames_letter %>%
group_by(sex, letter) %>%
summarise(
n = sum(n)
) %>%
group_by(sex) %>%
mutate(
prop = n/sum(n)
) %>%
ggplot() +
geom_col(aes(letter, prop, fill = sex), position = 'dodge')
## `summarise()` has grouped output by 'sex'. You can override using the `.groups` argument.

plot_dat <- babynames_letter %>%
mutate(sex = ifelse(sex == 'F', 'Female', 'Male')) %>%
group_by(year, sex, letter) %>%
summarise(
n = sum(n)
) %>%
group_by(year, sex) %>%
mutate(
prop = n/sum(n)
) %>%
filter((letter %in% c('a', 'e', 'i', 'o', 'u')))
## `summarise()` has grouped output by 'year', 'sex'. You can override using the `.groups` argument.
lab_let <- plot_dat %>%
group_by(sex, letter) %>%
filter(year == max(year) | year == min(year)) %>%
mutate(year = ifelse(year == max(year), year+2, year-2))
plot_dat %>%
ggplot() +
geom_line(aes(year, prop, color = sex, group = interaction(sex, letter))) +
geom_text(aes(year, prop, color = sex, group = letter, label = letter),
data = lab_let) +
scale_y_sqrt() +
theme_few()

set.seed(1990)
plot_dat <- babynames_letter %>%
mutate(sex = ifelse(sex == 'F', 'Female', 'Male')) %>%
group_by(year, sex, letter) %>%
summarise(
n = sum(n)
) %>%
group_by(year, sex) %>%
mutate(
prop = n/sum(n)
) %>%
# filter(!(letter %in% c('a', 'e', 'i', 'o', 'u'))) %>%
mutate(letter = letter %>% toupper()) %>%
ungroup() %>%
mutate(letter = factor(letter, sample(LETTERS)))
## `summarise()` has grouped output by 'year', 'sex'. You can override using the `.groups` argument.
lab_let <- plot_dat %>%
group_by(sex, letter) %>%
filter(year == max(year) | year == min(year)) %>%
mutate(year = ifelse(year == max(year), year, year))
plot_dat %>%
ggplot() +
geom_line(aes(year, prop, color = letter,
group = interaction(sex, letter))) +
geom_text_repel(aes(year, prop, color = letter, label = letter),
data = filter(lab_let, year == max(year)),
direction = "y", hjust = "left", nudge_x = 30,
max.overlaps = Inf, min.segment.length = 0,
segment.color = 'black', seed = 0, size = 4) +
geom_text_repel(aes(year, prop, color = letter, label = letter),
data = filter(lab_let, year == min(year)),
direction = "y", hjust = "left", nudge_x = -30,
max.overlaps = Inf, min.segment.length = 0,
segment.color = 'black', seed = 1, size = 4) +
scale_y_sqrt(
breaks = c(0.001, 0.007, seq(0.02, 0.2, 0.02)),
sec.axis = dup_axis()
) +
scale_x_continuous(
expand = expansion(mult = 0.3),
breaks = seq(1880, 2020, 20)
) +
theme_few() +
facet_wrap(~sex) +
theme(
legend.position = 'none'
)+
ylab('Proportion') +
xlab('Year') +
labs(
title = 'The popularity of the letters used in newborn baby names has changed over the years',
caption = '#TidyTuesday (2022, w12) | Data: babynames R package | Graph by @irg_bio'
)+
theme(
text = element_text(family = 'lato'),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5)
)

ggsave('final_graph.pdf', height = 5, width = 12)
plt_fun <- function(highlight) {
plot_dat %>%
ggplot() +
geom_line(aes(year, prop, color = letter,
size = letter %in% highlight,
alpha = letter %in% highlight,
group = interaction(sex, letter))) +
geom_text_repel(aes(year, prop, color = letter, label = letter,
alpha = letter %in% highlight),
data = filter(lab_let, year == max(year)),
direction = "y", hjust = "left", nudge_x = 30,
max.overlaps = Inf, min.segment.length = 0,
segment.color = 'black', seed = 0, size = 4
) +
geom_text_repel(aes(year, prop, color = letter, label = letter,
alpha = letter %in% highlight),
data = filter(lab_let, year == min(year)),
direction = "y", hjust = "left", nudge_x = -30,
max.overlaps = Inf, min.segment.length = 0,
segment.color = 'black', seed = 1, size = 4) +
scale_y_sqrt(
breaks = c(0, 0.001, 0.007, seq(0.02, 0.2, 0.02)),
sec.axis = dup_axis()
) +
scale_x_continuous(
expand = expansion(mult = 0.3),
breaks = seq(1880, 2020, 20)
) +
theme_few() +
facet_wrap(~sex) +
theme(
legend.position = 'none'
) +
scale_size_discrete(range = c(0.5, 1)) +
scale_alpha_discrete(range = c(0.3, 1)) +
ylab('Proportion') +
xlab('Year')
}
plt_fun(c('A', 'E', 'I', 'O', 'U')) +
labs(
title = 'The vowels',
caption = '#TidyTuesday (2022, w12) | Data: babynames R package | Graph by @irg_bio'
) +
theme(
text = element_text(family = 'lato'),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5)
)
## Warning: Using size for a discrete variable is not advised.
## Warning: Using alpha for a discrete variable is not advised.

ggsave('vowels.pdf', height = 5, width = 12)
babynames %>%
filter(year == '1950', sex == 'F', str_detect(tolower(name), 'k')) %>%
mutate(sum = sum(prop))
## # A tibble: 284 × 6
## year sex name n prop sum
## <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1950 F Kathleen 25704 0.0146 0.0590
## 2 1950 F Karen 24139 0.0137 0.0590
## 3 1950 F Kathy 9089 0.00517 0.0590
## 4 1950 F Kathryn 7864 0.00447 0.0590
## 5 1950 F Katherine 6204 0.00353 0.0590
## 6 1950 F Vicki 5827 0.00331 0.0590
## 7 1950 F Vickie 3284 0.00187 0.0590
## 8 1950 F Kay 2626 0.00149 0.0590
## 9 1950 F Jackie 1276 0.000726 0.0590
## 10 1950 F Kristine 1247 0.000709 0.0590
## # … with 274 more rows
babynames %>%
filter(year == '1950', sex == 'F')
## # A tibble: 6,111 × 5
## year sex name n prop
## <dbl> <chr> <chr> <dbl> <dbl>
## 1 1950 F Linda 80432 0.0457
## 2 1950 F Mary 65482 0.0372
## 3 1950 F Patricia 47945 0.0273
## 4 1950 F Barbara 41557 0.0236
## 5 1950 F Susan 38018 0.0216
## 6 1950 F Nancy 29618 0.0168
## 7 1950 F Deborah 29064 0.0165
## 8 1950 F Sandra 28895 0.0164
## 9 1950 F Carol 26165 0.0149
## 10 1950 F Kathleen 25704 0.0146
## # … with 6,101 more rows
plt_fun(c('F', 'S', 'O')) +
labs(
title = 'The rise and fall of letters',
caption = '#TidyTuesday (2022, w12) | Data: babynames R package | Graph by @irg_bio'
) +
theme(
text = element_text(family = 'lato'),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5)
)
## Warning: Using size for a discrete variable is not advised.
## Warning: Using alpha for a discrete variable is not advised.

ggsave('peaks.pdf', height = 5, width = 12)
plt_fun(c('R', 'W')) +
labs(
title = 'Decreasing trends',
caption = '#TidyTuesday (2022, w12) | Data: babynames R package | Graph by @irg_bio'
) +
theme(
text = element_text(family = 'lato'),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5)
)
## Warning: Using size for a discrete variable is not advised.
## Warning: Using alpha for a discrete variable is not advised.

ggsave('decreasing.pdf', height = 5, width = 12)
plt_fun(c('K', 'X', 'N')) +
labs(
title = 'The newbies',
caption = '#TidyTuesday (2022, w12) | Data: babynames R package | Graph by @irg_bio'
) +
theme(
text = element_text(family = 'lato'),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5)
)
## Warning: Using size for a discrete variable is not advised.
## Warning: Using alpha for a discrete variable is not advised.

ggsave('increasing.pdf', height = 5, width = 12)